pacman::p_load(dplyr,readr,lubridate,tidyr,stringr)
rm(list=ls())
################################################################################

###################################################### Crop prices

#################### Producer crop prices

croplist <- c("Soya beans", "Maize (corn)", "Wheat", "Sunflower seed")
cropabb <- c("soy", "maize", "wheat", "sunflower")

#Prices
df <- read_csv("../../data/prices/raw/faostat/argentina_producer_prices_index.csv",show_col_types = FALSE)
df <- df %>% filter(Element == "Producer Price (USD/tonne)") %>%
  dplyr::select(country = Area, item = Item, year = Year, price = Value) %>%
  filter(country == "Argentina", item %in% croplist)
dfy <- NULL
for (n in seq_along(croplist)) {
  crop <- croplist[n]
  dfx <- df %>%
    filter(item == crop) %>% 
    dplyr::select(year, price) %>%  
    rename(!!paste0(cropabb[n], "_price_fg") := price)
  if (is.null(dfy)) {
    dfy <- dfx
  } else {
    dfy <- inner_join(dfy, dfx, by = "year")
  }
}
df_base <- dfy

#Price index 
df <- read_csv("../../data/prices/raw/faostat/argentina_producer_prices_index.csv",show_col_types = FALSE)
df <- df %>%
  filter(Element == "Producer Price Index (2014-2016 = 100)") %>%
  dplyr::select(country = Area, item = Item, year = Year, price = Value) %>%
  filter(country == "Argentina", item %in% croplist)
dfy <- NULL
for (n in seq_along(croplist)) {
  crop <- croplist[n]
  dfx <- df %>%
    filter(item == crop) %>%
    dplyr::select(year, price) %>%
    rename(!!paste0(cropabb[n], "_price_fg") := price)
  if (is.null(dfy)) {
    dfy <- dfx
  } else {
    dfy <- inner_join(dfy, dfx, by = "year")
  }
}
df_index <- dfy

#Prices in USD: 2014-2016 index, FRED world average 2014-2016, and export taxes
df_index <- df_index %>%  mutate(soy_price_fg = soy_price_fg * 0.65 * 389 / 100,
                                 maize_price_fg = maize_price_fg * 0.8 * 174 / 100,
                                 wheat_price_fg = wheat_price_fg * 0.8 * 191 / 100,
                                 sunflower_price_fg = sunflower_price_fg * 0.8 * 1037 / 100)

##Prices in USD: pre-2015
croplist <- c("Soybeans", "Maize", "Wheat", "Sunflower seed")
cropabb <- c("soy", "maize", "wheat", "sunflower")
df <- read_csv("../../data/trade/raw/world_producer_prices.csv", show_col_types = FALSE) %>%
  filter(Element == "Producer Price (USD/tonne)") %>%
  dplyr::select(country = Area, item = Item, year = Year, price = Value) %>%
  filter(country == "Argentina", item %in% croplist)
dfy <- NULL
for (i in seq_along(croplist)) {
  crop <- croplist[i]
  abbr <- cropabb[i]
  dfx <- df %>% filter(item == crop) %>% dplyr::select(year, !!paste0(abbr, "_price_fg") := price)
  dfy <- if (is.null(dfy)) dfx else merge(dfy, dfx, by = "year")
}
df_final <- bind_rows(dfy, df_index %>% filter(year > 2014))
write_csv(df_final, "../../data/prices/clean/farmgate_crops_argentina_nation.csv.gz")


#################### County-level producer prices

#Time series
dft <- read_csv("../../data/prices/clean/farmgate_crops_argentina_nation.csv.gz", show_col_types = FALSE)

#Cross section
df <- read_csv("../../data/agribusiness/clean/soybean_argentina.csv.gz", show_col_types = FALSE) %>%
  mutate(price = fob_usd / equivalent_tonnes) %>% filter(price > 0) %>% dplyr::select(county_id, price) %>%
  group_by(county_id) %>% summarise(price = median(price, na.rm = TRUE), .groups = "drop")
df <- df %>% mutate(price = price / mean(price, na.rm = TRUE))
years <- unique(dft$year)
dfi <- bind_rows(lapply(years, function(y) {
  df %>% mutate(year = y)
}))
df <- dplyr::left_join(dfi, dft, by = "year") %>%
  mutate(soybean   = soy_price_fg * price,
         maize     = maize_price_fg * price,
         wheat     = wheat_price_fg * price,
         sunflower = sunflower_price_fg * price) %>%
  dplyr::select(-price, -soy_price_fg, -maize_price_fg, -wheat_price_fg, -sunflower_price_fg) %>%
  pivot_longer(cols = c(soybean, maize, wheat, sunflower),names_to = "commodity",values_to = "price")

df_c <- read_csv("../../data/landuse/clean/geographicunits_argentina.csv.gz", show_col_types = FALSE) %>%
  dplyr::select(county_id,state_id,region,country)
df_c$spatial_unit = 'county_id'
df <- df_c %>% inner_join(df, by = c("county_id"))
df <- df %>% distinct(year, county_id, commodity, .keep_all = T)
df <- df %>% rename(spatial_id = county_id)  %>% filter(year >1991)
write_csv(df,'../../data/prices/clean/farmgate_crops_argentina_county_id.csv.gz')



###################################################### Cattle prices

#################### Transaction-level prices live cattle prices

df1 <- read_delim("../../data/prices/raw/datos_agroindustriales/sio-carnes-01-ene-a-30-jun-.csv", delim = ";", locale = locale(encoding = "Latin1"), show_col_types = FALSE)
df2 <- read_delim("../../data/prices/raw/datos_agroindustriales/sio-carnes-01-jul-a-30-sep-.csv", delim = ";", locale = locale(encoding = "Latin1"), show_col_types = FALSE)
df <- bind_rows(df1, df2)
df <- df %>%
  mutate(across(everything(), ~ str_replace_all(., c(
    "š" = "U", #"¥" = "N","√ë" = "N", "Ñ" = "N",
    "ZONA" = "ZONE", "Cabeza" = "Head", "Kg. Carne" = "Kg. Meat", "Kg. Vivo" = "Kg. Live"
  ))))
df <- df %>% dplyr::select(-pais_id, -pais)
colnames(df) <- c(
  "province_id", "province", "county_id", "county", "destination_zone",
  "receipt_date", "breed", "category", "heads_traded", "unit",
  "price_per_head", "price_per_live_kg", "number_of_kg"
)
df <- df %>%  mutate(province = str_to_upper(str_trim(province)),county = str_to_upper(str_trim(county)))
df <- df %>%
  mutate(
    province = str_replace_all(province, c("CIUDAD AUTONOMA DE BUENOS AIRES" = "CAPITAL FEDERAL")),
    county = str_replace_all(county, c(
      "1 DE MAYO" = "PRIMERO DE MAYO",
      "12 DE OCTUBRE" = "DOCE DE OCTUBRE",
      "2 DE ABRIL" = "DOS DE ABRIL",
      "25 DE MAYO" = "VEINTICINCO DE MAYO",
      "25 DE  MAYO" = "VEINTICINCO DE MAYO",
      "9 DE JULIO" = "NUEVE DE JULIO",
      "ADOLFO GONZALES CHAVES" = "GONZALES CHAVES",
      "CORONEL BRANDSEN" = "BRANDSEN",
      "CHOS  MALAL" = "CHOS MALAL",
      "DR MANUEL BELGRANO" = "CAPITAL",
      "FLORENTINO AMEGHINO" = "AMEGHINO",
      "GENERAL ANGEL V PENALOZA" = "GENERAL ANGEL V. PENALOZA",
      "JUAN B ALBERDI" = "JUAN B. ALBERDI",
      "JUAN F IBARRA" = "JUAN F. IBARRA",
      "LEANDRO N ALEM" = "LEANDRO N. ALEM",
      "MAYOR LUIS J FONTANA" = "MAYOR LUIS J. FONTANA",
      "PRESIDENTE DE LA PLAZA" = "PRESIDENCIA DE LA PLAZA",
      "GENERAL JUAN MADARIAGA" = "GENERAL MADARIAGA",
      "GUAYASAN" = "GUASAYAN",
      "LA CANDELARIA" = "CANDELARIA",
      "CORONEL DE MARINA L ROSALES" = "CORONEL ROSALES",
      "GENERAL LA MADRID" = "GENERAL LAMADRID",
      "GENERAL JUAN F QUIROGA" = "GENERAL JUAN F. QUIROGA",
      "SENGUERR" = "SENGUER",
      "GENERAL SARMIENTO" = "CAPITAN SARMIENTO",
      "LA CAPITAL" = "CAPITAL",
      "JOSE CLEMENTE PAZ" = "JOSE C. PAZ",
      "SAN MIGUEL DE TUCUMAN" = "CAPITAL",
      "JUAN MARTIN DE PUEYRREDON" = "CAPITAL",
      "SILPICA" = "SILIPICA",
      "LA BANDA" = "BANDA",
      "CHICALCO" = "CHICAL CO",
      "O' HIGGINS" = "O'HIGGINS"
    ))
  )

df <- df %>%
  mutate(
    county = case_when(
      str_starts(county, "ROSARIO VERA") ~ "ROSARIO VERA PENALOZA",
      str_starts(county, "PRESIDENTE ROQUE") ~ "PRESIDENTE ROQUE SAENZ PENA",
      str_ends(county, "UELAS") ~ "CANUELAS",
      str_starts(county, "LANGUI") ~ "LANGUINEO",
      str_starts(county, "LA VI") ~ "LA VINA",
      str_starts(county, "PATI") ~ "PATINO",
      str_starts(county, "A") & str_ends(county, "ELO") ~ "ANELO",
      TRUE ~ county
    )
  )

df <- df %>% filter(county != "S/D",county != "SIN DETERMINAR",county != "TODOS LOS DEPARTAMENTOS")
df <- df %>% mutate(
  timestamp = as.Date(receipt_date, format = "%Y-%m-%d"),
  year = year(timestamp), month = month(timestamp), day = day(timestamp), state = province) %>%
  dplyr::select(state, county, breed, category, heads_traded, unit,
                price_per_head, price_per_live_kg, number_of_kg,timestamp, year, month, day)
df_c <- read_csv("../../data/landuse/clean/geographicunits_argentina.csv.gz", show_col_types = FALSE) %>%
  dplyr::select(county_id, county, state)
df <- df_c %>% inner_join(df, by = c("county", "state"))
write_csv(df,"../../data/prices/clean/farmgate_livecattle_argentina_transaction.csv.gz")



#################### County-level live cattle prices

df <- read_csv('../../data/prices/clean/farmgate_livecattle_argentina_transaction.csv.gz', show_col_types = FALSE)
df <- df %>% mutate(kg_per_head = number_of_kg / heads_traded, price_per_head = price_per_head/30, price_per_live_kg = price_per_live_kg/30)
category_list <- c(
  "Aberdeen Angus",
  "Belted Galloway",
  "Blonde d'aquitaine",
  "Bovino Bueyes",
  "Bovino Criollo",
  "Bovino Machos Enteros Especiales y Buenos",
  "Bovino Machos Enteros Regulares",
  "Bovino Novillitos",
  "Bovino Novillitos Especiales y Buenos Medianos 351/390 kilos",
  "Bovino Novillitos Especiales y Buenos Pesados 391/430 kilos",
  "Bovino Novillitos Regulares",
  "Bovino Novillos",
  "Bovino Novillos Cruza Cebú hasta 440 kilos",
  "Bovino Novillos Cruza Cebú ms 440 kilos",
  "Bovino Novillos Cruza Europea hasta 470 kilos",
  "Bovino Novillos Cruza Europea ms 470 kilos",
  "Bovino Novillos Especiales y Buenos 431/460 kilos",
  "Bovino Novillos Especiales y Buenos 461/490 kilos",
  "Bovino Novillos Especiales y Buenos 491/520 kilos",
  "Bovino Novillos Especiales y Buenos ms 520 kilos",
  "Bovino Novillos Overos Negros ms de 500 kilos",
  "Bovino Novillos Regulares livianos",
  "Bovino Novillos Regulares pesados",
  "Bovino Terneras hasta 350 kilos",
  "Bovino Terneros hasta 350 kilos",
  "Bovino Toros Buenos",
  "Bovino Toros Regulares",
  "Bovino Vaca",
  "Bovino Vaca Buenas",
  "Bovino Vaca Conserva Buena",
  "Bovino Vaca Conserva Inferior",
  "Bovino Vaca Regulares",
  "Bovino Vaca preñada",
  "Bovino Vaquillona",
  "Bovino Vaquillona Especiales y Buenas Medianas 351/390 kilos",
  "Bovino Vaquillona Especiales y Buenas Pesadas 391/430 kilos",
  "Bovino Vaquillona Regulares",
  "Bovino Vaquillona preñada",
  "Braford",
  "Brahman",
  "Brangus",
  "Charolais",
  "Flieckvieh Simmental",
  "Hereford",
  "Holando Argentino",
  "Jersey",
  "Limangus",
  "Limuosin",
  "Piemontese",
  "Polled Hereford",
  "Retinta",
  "Santa Gertrudis",
  "Shorthorn",
  "West Highland"
)
breed_list <- c(
  "Aberdeen Angus",
  "Belted Galloway",
  "Blonde d'aquitaine",
  "Bovino Bueyes",
  "Bovino Criollo",
  "Bovino Machos Enteros Especiales y Buenos",
  "Bovino Machos Enteros Regulares",
  "Bovino Novillitos",
  "Bovino Novillitos Especiales y Buenos Medianos 351/390 kilos",
  "Bovino Novillitos Especiales y Buenos Pesados 391/430 kilos",
  "Bovino Novillitos Regulares",
  "Bovino Novillos",
  "Bovino Novillos Cruza Cebú hasta 440 kilos",
  "Bovino Novillos Cruza Cebú ms 440 kilos",
  "Bovino Novillos Cruza Europea hasta 470 kilos",
  "Bovino Novillos Cruza Europea ms 470 kilos",
  "Bovino Novillos Especiales y Buenos 431/460 kilos",
  "Bovino Novillos Especiales y Buenos 461/490 kilos",
  "Bovino Novillos Especiales y Buenos 491/520 kilos",
  "Bovino Novillos Especiales y Buenos ms 520 kilos",
  "Bovino Novillos Overos Negros ms de 500 kilos",
  "Bovino Novillos Regulares livianos",
  "Bovino Novillos Regulares pesados",
  "Bovino Terneras hasta 350 kilos",
  "Bovino Terneros hasta 350 kilos",
  "Bovino Toros Buenos",
  "Bovino Toros Regulares",
  "Bovino Vaca",
  "Bovino Vaca Buenas",
  "Bovino Vaca Conserva Buena",
  "Bovino Vaca Conserva Inferior",
  "Bovino Vaca Regulares",
  "Bovino Vaca preñada",
  "Bovino Vaquillona",
  "Bovino Vaquillona Especiales y Buenas Medianas 351/390 kilos",
  "Bovino Vaquillona Especiales y Buenas Pesadas 391/430 kilos",
  "Bovino Vaquillona Regulares",
  "Bovino Vaquillona preñada",
  "Braford",
  "Brahman",
  "Brangus",
  "Charolais",
  "Flieckvieh Simmental",
  "Hereford",
  "Holando Argentino",
  "Jersey",
  "Limangus",
  "Limuosin",
  "Piemontese",
  "Polled Hereford",
  "Retinta",
  "Santa Gertrudis",
  "Shorthorn",
  "West Highland"
)
breed_list_indicus <- c(
  "Braford", "Brahman", "Brangus",
  "Bovino Novillos Cruza Cebú hasta 440 kilos",
  "Bovino Novillos Cruza Cebú ms 440 kilos",
  "Santa Gertrudis")
breed_list_taurus <- c(
  "Aberdeen Angus", "Belted Galloway", "Flieckvieh Simmental",
  "Hereford", "Holando Argentino", "Jersey", "Polled Hereford",
  "Shorthorn", "West Highland", "Blonde d'aquitaine", "Charolais",
  "Limuosin", "Piemontese", "Retinta",
  "Bovino Novillos Cruza Europea hasta 470 kilos",
  "Bovino Novillos Cruza Europea ms 470 kilos")
df <- df %>% filter(category %in% category_list) %>% filter(breed %in% breed_list)
dfc <- df

#County-level indicators
dfi <- dfc %>% dplyr::select(county_id, state) %>% group_by(state, county_id) %>% summarise(.groups = "drop")  

#Heads traded
dfh <- dfc %>% dplyr::select(heads_traded, county_id) %>%
  group_by(county_id) %>% summarise(heads_traded = sum(heads_traded, na.rm = TRUE), .groups = "drop")

#Weight per head
dfk <- dfc %>% dplyr::select(kg_per_head, county_id) %>%
  group_by(county_id) %>% summarise(kg_per_head = median(kg_per_head, na.rm = TRUE), .groups = "drop")

#Price per head
dfph <- dfc %>% filter(unit == "Head", price_per_head > 0) %>% dplyr::select(price_per_head, county_id) %>%
  group_by(county_id) %>% summarise(price_per_head = median(price_per_head, na.rm = TRUE), .groups = "drop")

#Price per kg live weight
dfpk <- dfc %>%  filter(unit == "Kg. Live", price_per_live_kg > 0) %>%
  dplyr::select(price_per_live_kg, county_id) %>% group_by(county_id) %>%
  summarise(price_per_live_kg = median(price_per_live_kg, na.rm = TRUE), .groups = "drop")
df <- dfi %>%
  full_join(dfh, by = "county_id") %>%
  full_join(dfk, by = "county_id") %>%
  full_join(dfph, by = "county_id") %>%
  full_join(dfpk, by = "county_id")
dfc <- df %>% dplyr::select(county_id, price_per_head, price_per_live_kg)
write_csv(dfc, "../../data/prices/clean/farmgate_livecattle_argentina_county_id.csv.gz")


#################### Nation-level slaughter prices

df <- read_csv('../../data/prices/raw/datos_agroindustriales/indicadores-ganaderos-anuales.csv', show_col_types = FALSE)
df <- df %>% dplyr::select(-pais_id, -pais)
colnames(df) <- c(
  "year", "total_slaughter", "slaughter_weight", "percent_steers", "percent_females", 
  "production_000_tons", "exports_tons", "exports_usd", "export_price_per_ton_usd", 
  "export_share_of_production", "domestic_consumption_000_tons", "domestic_consumption_kg_per_capita", 
  "steer_price_ars_per_kg_liniers", "steer_price_usd_per_kg_liniers", "steer_weight_liniers")
write_csv(df, '../../data/prices/clean/slaughter_argentina_nation.csv.gz')



###################################################### Cattle profitability indicators

#################### Nation-level indicators

##Profitability indicators
df <- read_csv('../../data/prices/raw/datos_agroindustriales/indicadores-economicos-ganaderia-bovina.csv', show_col_types = FALSE)
colnames(df) <- c(
  "date", "calf_ars_per_kg", "steer_ars_per_kg", "ars_usd_rate", "wholesale_price_index",
  "corn_ars_per_ton", "monthly_wage_ars", "cow_ars_per_head", "heifer_pregnant_ars_per_head",
  "steer_kg_corn_tn_ratio", "steer_kg_calf_kg_ratio", "calf_kg_monthly_wage_ars_ratio",
  "heifers_cows_heads_ratio", "breeding_salado_ars_per_ha", "fattening_subhumeda_ars_per_ha",
  "feedlot_ars_per_head")

df <- df %>% mutate(date = ym(date),year = year(date),month = month(date))
df <- df %>%
  mutate(
    calf_usd_per_kg = calf_ars_per_kg / ars_usd_rate,
    steer_usd_per_kg = steer_ars_per_kg / ars_usd_rate,
    corn_usd_per_ton = corn_ars_per_ton / ars_usd_rate,
    monthly_wage_usd = monthly_wage_ars / ars_usd_rate,
    cow_usd_per_head = cow_ars_per_head / ars_usd_rate,
    heifer_pregnant_usd_per_head = heifer_pregnant_ars_per_head / ars_usd_rate,
    calf_kg_monthly_wage_usd_ratio = calf_kg_monthly_wage_ars_ratio / ars_usd_rate,
    breeding_salado_usd_per_ha = breeding_salado_ars_per_ha / ars_usd_rate,
    fattening_subhumeda_usd_per_ha = fattening_subhumeda_ars_per_ha / ars_usd_rate,
    feedlot_usd_per_head = feedlot_ars_per_head / ars_usd_rate
  )
df <- df %>% dplyr::select(-c(calf_ars_per_kg, steer_ars_per_kg, corn_ars_per_ton, monthly_wage_ars,
                              cow_ars_per_head, heifer_pregnant_ars_per_head, calf_kg_monthly_wage_ars_ratio,
                              breeding_salado_ars_per_ha, fattening_subhumeda_ars_per_ha, feedlot_ars_per_head))
write_csv(df, "../../data/prices/clean/profitability_cattle_argentina_nation.csv.gz")
df_arsusd <- df %>% dplyr::select(year, month, ars_usd_rate)
write_csv(df_arsusd, "../../data/prices/clean/ars_usd_rate_da.csv.gz")


#################### County-level indicators

##Cattle productivity indicators
df <- read_csv('../../data/prices/raw/datos_agroindustriales/produccion-de-carne-bovina.csv', show_col_types = FALSE)
colnames(df) <- c("province_id", "province", "county_id", "county", "province_county_id", "year", "month",
                  "model", "activity", "reference", "margin_ars_per_ha", "net_result_ars_per_ha", "net_income_ars_per_ha",
                  "direct_costs_ars_per_ha", "indirect_costs_ars_per_ha", "stock_efficiency_percent",
                  "production_kg_per_ha", "weaning_rate_percent", "load_kg_per_ha")
df <- df %>% dplyr::select(-province_county_id, -reference)
df <- df %>% filter(county != "SIN DEFINIR") %>%
  mutate(county = recode(county,
                         "1 DE MAYO" = "PRIMERO DE MAYO",
                         "12 DE OCTUBRE" = "DOCE DE OCTUBRE",
                         "2 DE ABRIL" = "DOS DE ABRIL",
                         "25 DE MAYO" = "VEINTICINCO DE MAYO",
                         "25 DE  MAYO" = "VEINTICINCO DE MAYO",
                         "9 DE JULIO" = "NUEVE DE JULIO",
                         "ADOLFO GONZALES CHAVES" = "GONZALES CHAVES",
                         "CHOS  MALAL" = "CHOS MALAL",
                         "DR MANUEL BELGRANO" = "CAPITAL",
                         "FLORENTINO AMEGHINO" = "AMEGHINO",
                         "GENERAL ANGEL V PENALOZA" = "GENERAL ANGEL V. PENALOZA",
                         "JUAN B ALBERDI" = "JUAN B. ALBERDI",
                         "JUAN F IBARRA" = "JUAN F. IBARRA",
                         "LEANDRO N ALEM" = "LEANDRO N. ALEM",
                         "MAYOR LUIS J FONTANA" = "MAYOR LUIS J. FONTANA",
                         "PRESIDENTE DE LA PLAZA" = "PRESIDENCIA DE LA PLAZA",
                         "GENERAL JUAN MADARIAGA" = "GENERAL MADARIAGA",
                         "GUAYASAN" = "GUASAYAN",
                         "LA CANDELARIA" = "CANDELARIA",
                         "CORONEL DE MARINA L ROSALES" = "CORONEL ROSALES",
                         "GENERAL LA MADRID" = "GENERAL LAMADRID",
                         "GENERAL JUAN F QUIROGA" = "GENERAL JUAN F. QUIROGA",
                         "SENGUERR" = "SENGUER",
                         "GENERAL SARMIENTO" = "CAPITAN SARMIENTO",
                         "LA CAPITAL" = "CAPITAL",
                         "JOSE CLEMENTE PAZ" = "JOSE C. PAZ",
                         "SAN MIGUEL DE TUCUMAN" = "CAPITAL",
                         "JUAN MARTIN DE PUEYRREDON" = "CAPITAL",
                         "SILPICA" = "SILIPICA",
                         "LA BANDA" = "BANDA"
  ))
df <- df %>% mutate(activity = recode(activity,"ciclo completo" = "full cycle",
                           "cria" = "breeder","invernada" = "fattening"))
df <- df %>% mutate(weaning_rate_percent = na_if(weaning_rate_percent, -99999)) %>%
  mutate(weaning_rate_percent = na_if(weaning_rate_percent, 0))
df_ars <- read_csv('../../data/prices/clean/ars_usd_rate_da.csv.gz', show_col_types = FALSE)
df <- df %>% left_join(df_ars, by = c("year", "month"))
df <- df %>%
  mutate(margin_usd_per_ha = margin_ars_per_ha / ars_usd_rate,
         net_result_usd_per_ha = net_result_ars_per_ha / ars_usd_rate,
         net_income_usd_per_ha = net_income_ars_per_ha / ars_usd_rate,
         direct_costs_usd_per_ha = direct_costs_ars_per_ha / ars_usd_rate,
         indirect_costs_usd_per_ha = indirect_costs_ars_per_ha / ars_usd_rate,
         state = province)
df <- df %>% dplyr::select(-province_id, -province, -county_id, -ars_usd_rate,
                           -margin_ars_per_ha, -net_result_ars_per_ha, -net_income_ars_per_ha,
                           -direct_costs_ars_per_ha, -indirect_costs_ars_per_ha)
df_c <- read_csv('../../data/landuse/clean/geographicunits_argentina.csv.gz', show_col_types = FALSE) %>%
  dplyr::select(county_id, county, state)
df <- df_c %>% inner_join(df, by = c("county", "state"))
write_csv(df, '../../data/prices/clean/profitability_cattle_argentina_county_id.csv.gz')


###################################################### Final cattle price imputation

####################Time series

df <- read_csv('../../data/prices/clean/profitability_cattle_argentina_nation.csv.gz', show_col_types = FALSE)
df1 <- df %>%
  dplyr::select(year,calf_usd_per_kg,steer_usd_per_kg,corn_usd_per_ton,cow_usd_per_head,
    heifer_pregnant_usd_per_head,breeding_salado_usd_per_ha,
    fattening_subhumeda_usd_per_ha,feedlot_usd_per_head ) %>%
  group_by(year) %>%
  summarise(across(everything(), mean, na.rm = TRUE), .groups = "drop") %>%
  mutate(price_breeder_per_head_t = heifer_pregnant_usd_per_head,
    price_breeder_per_tn_t = steer_usd_per_kg * 1000,
    payoff_salado_per_ha_t = breeding_salado_usd_per_ha) %>%
  dplyr::select(year, price_breeder_per_head_t, price_breeder_per_tn_t, payoff_salado_per_ha_t)
df2 <- read_csv('../../data/prices/clean/land_prices_nation_argentina.csv.gz', show_col_types = FALSE) %>%
  mutate(payoff_ma_per_ha_t = breeding_USD_ha) %>% dplyr::select(year, payoff_ma_per_ha_t)
df3 <- read_csv('../../data/prices/clean/slaughter_argentina_nation.csv.gz', show_col_types = FALSE) %>%
  mutate(price_liniers_per_tn_t = steer_price_usd_per_kg_liniers * 1000,
         price_liniers_per_head_t = steer_price_usd_per_kg_liniers * steer_weight_liniers) %>%
  dplyr::select(year, price_liniers_per_tn_t, price_liniers_per_head_t)
dfts <- df1 %>%
  full_join(df2, by = "year") %>%
  full_join(df3, by = "year") %>%
  dplyr::select(year,price_breeder_per_head_t, price_breeder_per_tn_t,
                price_liniers_per_head_t, price_liniers_per_tn_t,payoff_salado_per_ha_t, payoff_ma_per_ha_t)

#################### Cross section
df <- read_csv('../../data/prices/clean/profitability_cattle_argentina_county_id.csv.gz', show_col_types = FALSE)
df <- df %>% filter(activity == "breeder") %>%
  dplyr::select(county_id,stock_efficiency_percent, weaning_rate_percent, 
                production_kg_per_ha, load_kg_per_ha,margin_usd_per_ha, net_result_usd_per_ha, net_income_usd_per_ha,
                direct_costs_usd_per_ha, indirect_costs_usd_per_ha) %>% group_by(county_id) %>%
  summarise(across(everything(), mean, na.rm = TRUE), .groups = "drop")
dfp <- df
dfc <- read_csv('../../data/prices/clean/farmgate_livecattle_argentina_county_id.csv.gz', show_col_types = FALSE)
df <- dfc %>% full_join(dfp, by = "county_id") %>%
  mutate(price_per_head_i = price_per_head,price_per_tn_i = price_per_live_kg * 1000,
         payoff_per_ha_i = net_income_usd_per_ha)
dfcs <- df %>% dplyr::select(county_id, price_per_head_i, price_per_tn_i, payoff_per_ha_i)

#################### Combine cross section and time series

#Combine
years <- sort(unique(dfts$year))
dfit <- bind_rows(lapply(years, function(y) {
  dfcs %>%
    mutate(year = y)
}))

#Define time series prices
dfts <- dfts %>% mutate(price_per_head_t = price_liniers_per_head_t,
    price_per_tn_t = price_liniers_per_tn_t, payoff_per_ha_t = payoff_ma_per_ha_t) %>% filter(year > 1990)

#Merge time series prices
df <- dfit %>% inner_join(dfts, by = "year")

#Define imputation method
df <- df %>%
  mutate(price_per_head = (price_per_head_i * price_per_head_t) / mean(price_per_head_i, na.rm = TRUE),
         price_per_tn = (price_per_tn_i * price_per_tn_t) / mean(price_per_tn_i, na.rm = TRUE),
         payoff_per_ha = (payoff_per_ha_i * payoff_per_ha_t) / mean(payoff_per_ha_i, na.rm = TRUE) ) %>%
  dplyr::select(year, county_id, price_per_head, price_per_tn, payoff_per_ha) %>%
  pivot_longer(cols = c(price_per_head, price_per_tn, payoff_per_ha),names_to = "value_type",values_to = "value" )

#Geographic units
df_c <- read_csv('../../data/landuse/clean/geographicunits_argentina.csv.gz', show_col_types = FALSE) %>%
  dplyr::select(county_id, state_id, region, country) %>%
  mutate(spatial_unit = "county_id")
df <- df_c %>% inner_join(df, by = "county_id")
df <- df %>% distinct(year, county_id, value_type, .keep_all = T)
df <- df %>% rename(spatial_id = county_id) %>% mutate(establishment_type = "all establishments")
df <- df %>% dplyr::select(spatial_id, state_id, region, country, spatial_unit,year, establishment_type, value_type, value)
write_csv(df,'../../data/prices/clean/farmgate_cattle_argentina_county_id.csv.gz')

